Developer CD Series 1996 May: Tool Chest

home *** CD-ROM | disk | FTP | other *** search

/ Developer CD Series 1996 May: Tool Chest / Developer CD Series May 1996 (Tool Chest) (Apple Computer) (1996).iso / Tool Chest / Development Tools & Languages / Dylan Related / Mindy / Mindy 1.2 - portable sources / libraries / string-ext / interface.dylan < prev next >

Wrap

Text File | 1995-03-15 | 15.0 KB | 443 lines | [TEXT/ttxt]

module: regular-expressions author: Nick Kramer (nkramer@cs.cmu.edu) synopsis: This provides a useable interface for users. Functions defined outside this file are really too strange and quirky to be of use to people. copyright: Copyright (C) 1994, Carnegie Mellon University. All rights reserved. rcs-header: $Header: interface.dylan,v 1.1 94/11/08 22:56:51 nkramer Exp $ //====================================================================== // // Copyright (c) 1994 Carnegie Mellon University // All rights reserved. // // Use and copying of this software and preparation of derivative // works based on this software are permitted, including commercial // use, provided that the following conditions are observed: // // 1. This copyright notice must be retained in full on any copies // and on appropriate parts of any derivative works. // 2. Documentation (paper or online) accompanying any system that // incorporates this software, or any part of it, must acknowledge // the contribution of the Gwydion Project at Carnegie Mellon // University. // // This software is made available "as is". Neither the authors nor // Carnegie Mellon University make any warranty about the software, // its performance, or its conformity to any specification. // // Bug reports, questions, comments, and suggestions should be sent by // E-mail to the Internet address "gwydion-bugs@cs.cmu.edu". // //====================================================================== // Functions that aren't exported are marked as such. Everything else // is exported. // Find the position of a regular expression inside a string. If the // regexp is not found, return #f, otherwise return a variable number // of marks. // define open generic regexp-position (big :: <string>, regexp :: <string>, #key start:, end:, case-sensitive:) => (regexp-start :: <integer?>, #rest marks :: <integer?>); define method regexp-position (big :: <string>, regexp :: <string>, #key start: big-start = 0, end: big-end = #f, case-sensitive = #f) => (regexp-start :: <integer?>, #rest marks :: <integer?>); let substring = subsequence(big, start: big-start, end: if (big-end) big-end else size(big) end); let comparison = if (case-sensitive) \= else case-insensitive-equal end; let char-set-class = if (case-sensitive) <case-sensitive-character-set>; else <case-insensitive-character-set>; end if; let (parsed-regexp, last-group) = parse(regexp, char-set-class); let (matched, marks) = if (is-anchored?(parsed-regexp)) anchored-match-root?(parsed-regexp, substring, comparison, last-group + 1); else match-root?(parsed-regexp, substring, comparison, last-group + 1); end if; if (matched) apply(values, adjust-marks!(marks, big-start)); else #f end if; end method regexp-position; // Returns an appropriate matcher function that acts just like // regexp-position curried. If the user can deal with a positioner // that works only on byte strings and doesn't return any marks, and // doesn't mind the extra wait while the thing compiles, he can get a // positioner that executes considerably faster. // define open generic make-regexp-positioner (regexp :: <string>, #key byte-characters-only:, need-marks:, maximum-compile:, case-sensitive:) => regexp-positioner :: <function>; define method make-regexp-positioner (regexp :: <string>, #key byte-characters-only = #f, need-marks = #t, maximum-compile = #f, case-sensitive = #f) => regexp-positioner :: <function>; let comparison = if (case-sensitive) \= else case-insensitive-equal end; let char-set-class = if (case-sensitive) <case-sensitive-character-set>; else <case-insensitive-character-set>; end if; let (parsed-regexp, last-group, has-backrefs, alternatives, quantifiers) = parse(regexp, char-set-class); let match-root-function = if (is-anchored?(parsed-regexp)) anchored-match-root? else match-root?; end if; if (~maximum-compile | has-backrefs | ~byte-characters-only | need-marks) method (big :: <string>, #key start: big-start = 0, end: big-end = #f) => (regexp-start :: <integer?>, #rest marks :: <integer?>); let substring = subsequence(big, start: big-start, end: if (big-end) big-end else size(big) end); let (matched, marks) = match-root-function(parsed-regexp, substring, comparison, last-group + 1); if (matched) apply(values, adjust-marks!(marks, big-start)); else #f end if; end method; else let (nfa-begin, nfa-end) = build-nfa(parsed-regexp); // Now modify the DFA to accomodate a substring match rather // than matching the entire string let dot = make(<set-atom>, set: any-char); let star = make(<e-state>, next-state: dot, other-next-state: nfa-begin); dot.next-state := star; let dfa = nfa-to-dfa(star, nfa-end, comparison); method (big :: <string>, #key start: big-start = 0, end: big-end = #f) => answer :: <boolean>; let substring = subsequence(big, start: big-start, end: if (big-end) big-end else size(big) end); sim-dfa(dfa, substring); end method; end if; end method make-regexp-positioner; // A method used by regexp-position and make-regexp-positioner. Just // adds offset to every element of the sequence that isn't #f. There // isn't any actual mutation, but just in case someone wants to // rewrite it to involve mutation... // define method adjust-marks! (marks :: <sequence>, offset :: <integer>) => adjusted-marks :: <sequence>; map(method (a-mark :: <integer?>) if (a-mark = #f) #f else (a-mark + offset) end; end method, marks); end method adjust-marks!; define open generic regexp-replace (input :: <string>, regexp :: <string>, new-substring :: <string>, #key count:, case-sensitive:, start:, end:) => changed-string :: <string>; define method regexp-replace (input :: <string>, regexp :: <string>, new-substring :: <string>, #key count = #f, case-sensitive = #f, start = 0, end: input-end = #f) => changed-string :: <string>; let positioner = make-regexp-positioner(regexp, case-sensitive: case-sensitive); do-replacement(positioner, new-substring, input, start, input-end, count, #t); end method regexp-replace; define open generic make-regexp-replacer (regexp :: <string>, #key replace-with:, case-sensitive:) => replacer :: <function>; define method make-regexp-replacer (regexp :: <string>, #key replace-with, case-sensitive = #f) => replacer :: <function>; let positioner = make-regexp-positioner(regexp, case-sensitive: case-sensitive); if (replace-with) method (input :: <string>, #key count: count, start = 0, end: input-end = #f) => string :: <string>; do-replacement(positioner, replace-with, input, start, input-end, count, #t); end method; else method (input :: <string>, new-substring :: <string>, #key count = #f, case-sensitive = #f, start = 0, end: input-end = #f) => string :: <string>; do-replacement(positioner, new-substring, input, start, input-end, count, #t); end method; end if; end method make-regexp-replacer; // equivalent of Perl's tr. Does a character by character translation. // define open generic translate (input :: <string>, from-set :: <string>, to-set :: <string>, #key delete:, start:, end:) => output :: <string>; //The existing methods only work on byte-strings. // define method translate (input :: <byte-string>, from-set :: <byte-string>, to-set :: <byte-string>, #key delete: delete = #f, start = 0, end: input-end = #f) => output :: <byte-string>; let table = make-translation-table(from-set, to-set, delete: delete); run-translator(input, table, start, input-end | size(input)); end method translate; define open generic make-translator (from-set :: <string>, to-set :: <string>, #key delete:) => translator :: <function>; // Again, only byte-strings handled here // define method make-translator (from-set :: <byte-string>, to-set :: <byte-string>, #key delete: delete = #f) => translator :: <function>; let table = make-translation-table(from-set, to-set, delete: delete); method (input :: <byte-string>, #key start = 0, end: input-end = #f) => output :: <byte-string>; run-translator(input, table, start, input-end | size(input)); end method; end method make-translator; // Used by translate. Not exported. // define method make-translation-table (from-set :: <byte-string>, to-set :: <byte-string>, #key delete: delete = #f) => table :: <byte-character-table>; let from-index = 0; let to-index = 0; let previous-from = #f; let previous-to = #f; // These local methods are identical except for the // choice of variable names and next-from-character signals end of // string rather than repeating the last character indefinitely like // next-to-character does. local method next-from-character () if (from-index >= size(from-set)) #f; elseif (from-set[from-index] = '\\') from-index := from-index + 2; previous-from := from-set[from-index - 1]; elseif (from-set[from-index] = '-') if (previous-from = from-set[from-index + 1]) from-index := from-index + 1; from-set[from-index]; else previous-from := successor(previous-from); // and return that value end if; else from-index := from-index + 1; previous-from := from-set[from-index - 1]; end if; end method next-from-character; local method next-to-character () if (to-index >= size(to-set)) if (delete) #f else last(to-set) end; elseif (to-set[to-index] = '\\') to-index := to-index + 2; previous-to := to-set[to-index - 1]; elseif (to-set[to-index] = '-') if (previous-to = to-set[to-index + 1]) to-index := to-index + 1; to-set[to-index]; else previous-to := successor(previous-to); // and return that value end if; else to-index := to-index + 1; previous-to := to-set[to-index - 1]; end if; end method next-to-character; let table = make(<byte-character-table>); for (dummy keyed-by c in table) // Initialize table table[c] := c; end for; for (from-char = next-from-character() then next-from-character(), to-char = next-to-character() then next-to-character(), until from-char = #f) table[from-char] := to-char; end for; table; end method make-translation-table; // Used by translate. Not exported. // define method run-translator (source :: <byte-string>, table :: <byte-character-table>, start-index :: <integer>, end-index :: <integer>) => output :: <byte-string>; let dest-string = copy-sequence(source); let dest-index = start-index; for (source-index from start-index below end-index) let char = source[source-index]; if (table[char] ~= #f) dest-string[dest-index] := table[char]; dest-index := dest-index + 1; end if; end for; // Now resize dest-string, because deleting characters in the // translation would make dest-string shorter than we've // allocated. if (dest-index = end-index) dest-string; else replace-subsequence!(dest-string, "", start: dest-index, end: end-index); end if; end method run-translator; // Like Perl's split function // define open generic split (pattern :: <string>, input :: <string>, #key count:, remove-empty-items:, start:, end:); // => #rest whole-bunch-of-strings :: <string>; define method split (pattern :: <string>, input :: <string>, #key count = #f, remove-empty-items = #t, start = 0, end: input-end = #f) => #rest whole-bunch-of-strings :: <string>; let positioner = make-regexp-positioner(pattern); split-string(positioner, input, start, input-end | size(input), count, remove-empty-items); end method split; define open generic make-splitter (pattern :: <string>) => splitter :: <function>; define method make-splitter (pattern :: <string>) => splitter :: <function>; let positioner = make-regexp-positioner(pattern); method (string :: <string>, #key count = #f, remove-empty-items = #t, start = 0, end: input-end = #f) => #rest whole-bunch-of-strings :: <string>; split-string(positioner, string, start, input-end | size(string), count, remove-empty-items); end method; end method make-splitter; // Used by split. Not exported. // define method split-string (positioner :: <function>, input :: <string>, start :: <integer>, input-end :: <integer>, count :: <integer?>, remove-empty-items :: <object>) => #rest whole-bunch-of-strings :: <string>; let strings = make(<deque>); block (done) let end-of-last-match = 0; let start-of-where-to-look = start; let string-number = 1; // Since count: starts at 1, so // should string-number while (#t) let (substring-start, substring-end) = positioner(input, start: start-of-where-to-look, end: input-end); if (~substring-start | (count & (count <= string-number))) strings := push-last(strings, copy-sequence(input, start: end-of-last-match)); done(); elseif ((substring-start = start-of-where-to-look) & remove-empty-items) // delimited item is empty end-of-last-match := substring-end; start-of-where-to-look := end-of-last-match; else strings := push-last(strings, copy-sequence(input, start: end-of-last-match, end: substring-start)); string-number := string-number + 1; end-of-last-match := substring-end; start-of-where-to-look := end-of-last-match; end if; end while; end block; apply(values, strings); end method split-string; // join--like Perl's join // define open generic join (delimiter :: <string>, #rest strings) => big-string :: <string>; // This is not really any more efficient than concatenate-as, but it's // more convenient. // define method join (delimiter :: <string>, #rest strings) => big-string :: <string>; let length = max(0, (size(strings) - 1 ) * size(delimiter)); for (string in strings) length := length + size(string); end for; let big-string = make(<unicode-string>, size: length); let big-index = 0; for (i from 0 to size(strings) - 2) // Don't iterate over the last string let string = strings[i]; let new-index = big-index + size(string); big-string := replace-subsequence!(big-string, string, start: big-index, end: new-index); big-index := new-index; let new-index = big-index + size(delimiter); big-string := replace-subsequence!(big-string, delimiter, start: big-index, end: new-index); big-index := new-index; end for; if (size(strings) > 0) big-string := replace-subsequence!(big-string, last(strings), start: big-index, end: size(big-string)); end if; big-string; end method join;